# === load relevant libraries
library(data.table)
library(ggplot2)
library(DescTools)
library(lfe)

# === Figure 4(a). Fund flow, by style
rm(list = ls())

# get monthly style-level fund flow
data = readRDS('input_data/monthly_fund_data.RDS')
data = data[sectorFund == 0, list(flow = weighted.mean(flow, aum_1)), list(yyyymm, category)]

# demean and convert to quarterly frequency
data = merge(data, data[, list(m = mean(flow)), yyyymm], by = 'yyyymm')
data[, flow := flow - m]
data[, m := NULL]
data[, mm := yyyymm - 100*floor(yyyymm/100)]
data[, qq := ceiling(mm/3)*3]
data[, yyyyqq := yyyymm + (qq - mm)]
data = data[, list(flow = sum(flow)), list(yyyyqq, category)]
data[, date := as.Date(as.character(100*yyyyqq+1), '%Y%m%d')]

# plot
ggplot(data, aes(x = date, y = flow, color = category)) + geom_line() + theme_classic() + 
  geom_vline(xintercept = as.Date('2002-09-01'), col = 2, lty = 3, lwd = 1) + 
  labs(x = NULL, y = 'Quarterly fund flow') + scale_y_continuous(labels = scales::percent_format(accuracy = 1)) + 
  theme(legend.position = 'top', legend.title = element_blank())


# === Figure 4(b). Regression coefficient of fund flows on ratings

rm(list = ls())

# get 36 lags of past returns as controls
data = readRDS('input_data/monthly_fund_data.RDS')
yyyymms = sort(unique(data[, yyyymm]))
n = length(yyyymms)
for (i in 1:36){
  tmp = data.table(yyyymm = yyyymms[1:(n-i)], yyyymm_next = yyyymms[(i+1):n])
  data = merge(data, tmp, by = 'yyyymm', all.x = T)
  data = merge(data, data[, list(yyyymm = yyyymm_next, fundno, xx = ret)], by = c('yyyymm','fundno'), all.x = T)
  setnames(data, 'xx', paste0('ret_', i))
  data[, yyyymm_next := NULL]
}
data = data[0 == rowSums(is.na(data))]
rm(i, n, yyyymms)

# Estimate flow response to ratings for each cross-section
p.getOne = function(thisM){
  tmp = copy(data[yyyymm == thisM])
  fit = felm(flow ~ rating_1
             + ret_1+ret_2+ret_3+ret_4+ret_5+ret_6+ret_7+ret_8+ret_9+ret_10+ret_11+ret_12+ret_13+ret_14+ret_15+ret_16+ret_17+ret_18+ret_19+ret_20+ret_21+ret_22+ret_23+ret_24+ret_25+ret_26+ret_27+ret_28+ret_29+ret_30+ret_31+ret_32+ret_33+ret_34+ret_35+ret_36, tmp)
  return(data.table(yyyymm = thisM,  coef = fit$coef['rating_1',1]))
}

yyyymms = sort(unique(data[, yyyymm]))
data = Reduce(rbind, lapply(yyyymms, p.getOne)); rm(yyyymms, p.getOne)

# compute rolling 36 month averages and standard errors
data[, idx := 1:nrow(data)]
n = 36

p.summarizeOne = function(this){
  tmp = data[idx %in% c(this - 0:(n-1))]
  return(tmp[, list(yyyymm = max(yyyymm), coef = mean(coef), 
                    se = sd(coef)/sqrt(n))])
}

data = Reduce(rbind, lapply(n:nrow(data), p.summarizeOne)); rm(n, p.summarizeOne, tmp); gc()
data[, date := as.Date(as.character(yyyymm*100+1), '%Y%m%d')]

# Plot
ggplot(data, aes(x = date, y = coef)) + geom_line(lwd = 1) + geom_ribbon(aes(ymin = coef-1.96*se, ymax = coef+1.96*se), alpha = .2) + 
  theme_classic() + coord_cartesian(ylim = c(0, .008)) + scale_y_continuous(labels = scales::percent_format(accuracy = 0.1)) + 
  labs(x = NULL, y = 'Flow response to ratings') + geom_vline(xintercept = as.Date('2002-07-01'), lty = 3, col = 2, lwd = 1)

